import numpy as np
import pandas as pd
import seaborn as sb
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.offline import iplot, plot
from plotly.subplots import make_subplots
data = pd.read_csv(r'D:\\IBM Data Science Course\netflix_titles.csv')
data.shape
(8807, 12)
print(f"Number of Rows : {data.shape[0]} \nNumber of Columns : {data.shape[1]}")
Number of Rows : 8807 Number of Columns : 12
data.columns
Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
'release_year', 'rating', 'duration', 'listed_in', 'description'],
dtype='object')
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 8807 entries, 0 to 8806 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 show_id 8807 non-null object 1 type 8807 non-null object 2 title 8807 non-null object 3 director 6173 non-null object 4 cast 7982 non-null object 5 country 7976 non-null object 6 date_added 8797 non-null object 7 release_year 8807 non-null int64 8 rating 8803 non-null object 9 duration 8804 non-null object 10 listed_in 8807 non-null object 11 description 8807 non-null object dtypes: int64(1), object(11) memory usage: 825.8+ KB
data.describe()
| release_year | |
|---|---|
| count | 8807.000000 |
| mean | 2014.180198 |
| std | 8.819312 |
| min | 1925.000000 |
| 25% | 2013.000000 |
| 50% | 2017.000000 |
| 75% | 2019.000000 |
| max | 2021.000000 |
data.describe(exclude=np.number)
| show_id | type | title | director | cast | country | date_added | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 8807 | 8807 | 8807 | 6173 | 7982 | 7976 | 8797 | 8803 | 8804 | 8807 | 8807 |
| unique | 8807 | 2 | 8807 | 4528 | 7692 | 748 | 1767 | 17 | 220 | 514 | 8775 |
| top | s1 | Movie | Dick Johnson Is Dead | Rajiv Chilaka | David Attenborough | United States | January 1, 2020 | TV-MA | 1 Season | Dramas, International Movies | Paranormal activity at a lush, abandoned prope... |
| freq | 1 | 6131 | 1 | 19 | 19 | 2818 | 109 | 3207 | 1793 | 362 | 4 |
data.sample(5)
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 3748 | s3749 | TV Show | El desconocido | Not Given | Guillermo Iván, César Manjarrez, Estrella Solí... | Not Given | June 14, 2019 | 2019 | TV-MA | 2 Seasons | Crime TV Shows, International TV Shows, Spanis... | Based on real events, the fictional story of M... |
| 7223 | s7224 | Movie | Kon-Tiki | Joachim Rønning, Espen Sandberg | Pål Sverre Hagen, Anders Baasmo Christiansen, ... | United Kingdom, Norway, Denmark, Germany, Sweden | April 26, 2019 | 2012 | PG-13 | 96 min | Action & Adventure, Dramas, International Movies | With five loyal friends in tow, explorer Thor ... |
| 6389 | s6390 | TV Show | Bure Kaam Bura Natija, Kyun Bhai Chacha Haan B... | Not Given | Not Given | Not Given | March 31, 2018 | 2017 | TV-PG | 1 Season | Kids' TV | A clever uncle-nephew duo solves mysteries, cr... |
| 2810 | s2811 | Movie | Bypass Road | Naman Nitin Mukesh | Neil Nitin Mukesh, Adah Sharma, Rajit Kapoor, ... | India | March 15, 2020 | 2019 | TV-14 | 135 min | International Movies, Thrillers | On the night his ex-lover mysteriously dies, a... |
| 4083 | s4084 | Movie | Bert Kreischer: The Machine | Ryan Polito | Bert Kreischer | United States | February 22, 2019 | 2016 | TV-MA | 70 min | Stand-Up Comedy | From his run-in with a grizzly bear to partyin... |
data.head()
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | Movie | Dick Johnson Is Dead | Kirsten Johnson | NaN | United States | September 25, 2021 | 2020 | PG-13 | 90 min | Documentaries | As her father nears the end of his life, filmm... |
| 1 | s2 | TV Show | Blood & Water | NaN | Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban... | South Africa | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, TV Dramas, TV Mysteries | After crossing paths at a party, a Cape Town t... |
| 2 | s3 | TV Show | Ganglands | Julien Leclercq | Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi... | NaN | September 24, 2021 | 2021 | TV-MA | 1 Season | Crime TV Shows, International TV Shows, TV Act... | To protect his family from a powerful drug lor... |
| 3 | s4 | TV Show | Jailbirds New Orleans | NaN | NaN | NaN | September 24, 2021 | 2021 | TV-MA | 1 Season | Docuseries, Reality TV | Feuds, flirtations and toilet talk go down amo... |
| 4 | s5 | TV Show | Kota Factory | NaN | Mayur More, Jitendra Kumar, Ranjan Raj, Alam K... | India | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, Romantic TV Shows, TV ... | In a city of coaching centers known to train I... |
data.tail()
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8802 | s8803 | Movie | Zodiac | David Fincher | Mark Ruffalo, Jake Gyllenhaal, Robert Downey J... | United States | November 20, 2019 | 2007 | R | 158 min | Cult Movies, Dramas, Thrillers | A political cartoonist, a crime reporter and a... |
| 8803 | s8804 | TV Show | Zombie Dumb | NaN | NaN | NaN | July 1, 2019 | 2018 | TV-Y7 | 2 Seasons | Kids' TV, Korean TV Shows, TV Comedies | While living alone in a spooky town, a young g... |
| 8804 | s8805 | Movie | Zombieland | Ruben Fleischer | Jesse Eisenberg, Woody Harrelson, Emma Stone, ... | United States | November 1, 2019 | 2009 | R | 88 min | Comedies, Horror Movies | Looking to survive in a world taken over by zo... |
| 8805 | s8806 | Movie | Zoom | Peter Hewitt | Tim Allen, Courteney Cox, Chevy Chase, Kate Ma... | United States | January 11, 2020 | 2006 | PG | 88 min | Children & Family Movies, Comedies | Dragged from civilian life, a former superhero... |
| 8806 | s8807 | Movie | Zubaan | Mozez Singh | Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan... | India | March 2, 2019 | 2015 | TV-14 | 111 min | Dramas, International Movies, Music & Musicals | A scrappy but poor boy worms his way into a ty... |
data.isnull().sum()
show_id 0 type 0 title 0 director 2634 cast 825 country 831 date_added 10 release_year 0 rating 4 duration 3 listed_in 0 description 0 dtype: int64
data.fillna('Not Given',inplace=True)
data.isnull().sum()
show_id 0 type 0 title 0 director 0 cast 0 country 0 date_added 0 release_year 0 rating 0 duration 0 listed_in 0 description 0 dtype: int64
data.head(5)
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | Movie | Dick Johnson Is Dead | Kirsten Johnson | Not Given | United States | September 25, 2021 | 2020 | PG-13 | 90 min | Documentaries | As her father nears the end of his life, filmm... |
| 1 | s2 | TV Show | Blood & Water | Not Given | Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban... | South Africa | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, TV Dramas, TV Mysteries | After crossing paths at a party, a Cape Town t... |
| 2 | s3 | TV Show | Ganglands | Julien Leclercq | Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi... | Not Given | September 24, 2021 | 2021 | TV-MA | 1 Season | Crime TV Shows, International TV Shows, TV Act... | To protect his family from a powerful drug lor... |
| 3 | s4 | TV Show | Jailbirds New Orleans | Not Given | Not Given | Not Given | September 24, 2021 | 2021 | TV-MA | 1 Season | Docuseries, Reality TV | Feuds, flirtations and toilet talk go down amo... |
| 4 | s5 | TV Show | Kota Factory | Not Given | Mayur More, Jitendra Kumar, Ranjan Raj, Alam K... | India | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, Romantic TV Shows, TV ... | In a city of coaching centers known to train I... |
data.duplicated().sum()
0
data.duplicated(subset=['title']).sum()
0
del data['description']
data.head()
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | s1 | Movie | Dick Johnson Is Dead | Kirsten Johnson | Not Given | United States | September 25, 2021 | 2020 | PG-13 | 90 min | Documentaries |
| 1 | s2 | TV Show | Blood & Water | Not Given | Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban... | South Africa | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, TV Dramas, TV Mysteries |
| 2 | s3 | TV Show | Ganglands | Julien Leclercq | Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi... | Not Given | September 24, 2021 | 2021 | TV-MA | 1 Season | Crime TV Shows, International TV Shows, TV Act... |
| 3 | s4 | TV Show | Jailbirds New Orleans | Not Given | Not Given | Not Given | September 24, 2021 | 2021 | TV-MA | 1 Season | Docuseries, Reality TV |
| 4 | s5 | TV Show | Kota Factory | Not Given | Mayur More, Jitendra Kumar, Ranjan Raj, Alam K... | India | September 24, 2021 | 2021 | TV-MA | 2 Seasons | International TV Shows, Romantic TV Shows, TV ... |
print(f"Number of Rows : {data.shape[0]}\nNumber of Columns : {data.shape[1]}")
Number of Rows : 8807 Number of Columns : 11
type_count = data['type'].value_counts()
type_count
type Movie 6131 TV Show 2676 Name: count, dtype: int64
type_count.index
Index(['Movie', 'TV Show'], dtype='object', name='type')
barChart = px.bar(type_count,
text_auto = True,
title='Type Wise Count of Titles',
labels=dict(type="Type",value="Count"),
color = type_count.index,
color_discrete_map = {'Movie' : '#189ad3', 'TV Show' : '#e9724d'})
barChart.show()
pieChart = px.pie(values = type_count,
names = ['Movie','TV Show'],
title = 'Type Wise Count of Titles',
color = type_count.index,
color_discrete_map = {'Movie' : '#189ad3', 'TV Show' : '#e9724d'}
)
pieChart.update_traces(textinfo='label+value+percent')
pieChart.show()
data['rating'].unique()
array(['PG-13', 'TV-MA', 'PG', 'TV-14', 'TV-PG', 'TV-Y', 'TV-Y7', 'R',
'TV-G', 'G', 'NC-17', '74 min', '84 min', '66 min', 'NR', nan,
'TV-Y7-FV', 'UR'], dtype=object)
ratings = data.groupby(data['rating']).size()
print(ratings)
rating 66 min 1 74 min 1 84 min 1 G 41 NC-17 3 NR 80 PG 287 PG-13 490 R 799 TV-14 2160 TV-G 220 TV-MA 3207 TV-PG 863 TV-Y 307 TV-Y7 334 TV-Y7-FV 6 UR 3 dtype: int64
del ratings['84 min']
ratings
rating G 41 NC-17 3 NR 80 PG 287 PG-13 490 R 799 TV-14 2160 TV-G 220 TV-MA 3207 TV-PG 863 TV-Y 307 TV-Y7 334 TV-Y7-FV 6 UR 3 dtype: int64
ratings_bar = px.bar(ratings,
text_auto = True,
title = 'Distribution of Content based on Maturity Ratings',
color = ratings.index,
labels=dict(rating="Rating",value="No. of Titles")
)
ratings_bar.show()
df = data['country'].value_counts()
# del df['Not Given']
top10 = df.head(10)
top10
country United States 2818 India 972 United Kingdom 419 Japan 245 South Korea 199 Canada 181 Spain 145 France 124 Mexico 110 Egypt 106 Name: count, dtype: int64
bar = px.bar(top10,
text_auto = True,
title='Top 10 Countries based on number of titles',
labels=dict(country='Country',value="No. of Titles"),
color = top10.index
)
bar.show()
pie = px.pie(values = top10,
title='Top 10 Countries based on number of titles',
color = top10.index,
color_discrete_sequence = px.colors.sequential.Plasma)
pie.update_traces(textinfo='value+percent')
pie.show()
world_directors = data.groupby(data['director']).size()
sorted_dirs = world_directors.sort_values(ascending=False)
sorted_dirs
director
Rajiv Chilaka 19
Raúl Campos, Jan Suter 18
Suhas Kadav 16
Marcus Raboy 16
Jay Karas 14
..
Jos Humphrey 1
Jose Gomez 1
Jose Javier Reyes 1
Joseduardo Giordano, Sergio Goyri Jr. 1
Khaled Youssef 1
Length: 4528, dtype: int64
top10_dirs = sorted_dirs.head(10)
top10_dirs
director Rajiv Chilaka 19 Raúl Campos, Jan Suter 18 Suhas Kadav 16 Marcus Raboy 16 Jay Karas 14 Cathy Garcia-Molina 13 Jay Chapman 12 Youssef Chahine 12 Martin Scorsese 12 Steven Spielberg 11 dtype: int64
top10_dirs_bar = px.bar(top10_dirs,
text_auto = True,
orientation = 'h',
title = "Top 10 Directors in World",
labels = dict(director='Director',value="Total Content"),
color = top10_dirs.index)
top10_dirs_bar.show()
indian_content = data[(data['country']=='India') & (data['director']!= 'Not Given')]
indian_directors = indian_content.groupby(indian_content['director']).size()
sorted_ind_dir = indian_directors.sort_values(ascending=False)
top10_ind_dir = sorted_ind_dir.head(10)
top10_ind_dir.head(10)
director David Dhawan 9 Ram Gopal Varma 7 Imtiaz Ali 6 Sooraj R. Barjatya 6 Anees Bazmee 6 Rajkumar Santoshi 6 Anurag Kashyap 5 Prakash Jha 5 Umesh Mehra 5 Madhur Bhandarkar 5 dtype: int64
top_ind_dir_bar = px.bar(top10_ind_dir,
text_auto=True,
orientation='h',
color=top10_ind_dir.index,
color_discrete_sequence=px.colors.sequential.YlOrRd_r,
title = 'Top 10 Indian Directors',
labels = dict(director='Director',value='Total Content')
)
top_ind_dir_bar.show()
all_cat = data['listed_in'].str.split(', ',expand=True)
b = all_cat.melt(value_name='category').dropna()
top10_cat = b['category'].value_counts().head(10)
top10_cat
category International Movies 2752 Dramas 2427 Comedies 1674 International TV Shows 1351 Documentaries 869 Action & Adventure 859 TV Dramas 763 Independent Movies 756 Children & Family Movies 641 Romantic Movies 616 Name: count, dtype: int64
top10_cat_bar = px.bar(top10_cat,
text_auto=True,
orientation='h',
color=top10_cat.index,
color_discrete_sequence=px.colors.sequential.Plasma,
title = 'Top 10 Popular Categories',
labels = dict(category='Category',value='No. of Shows')
)
top10_cat_bar.show()
release_year = data.groupby(data['release_year']).size()
release_year
release_year
1925 1
1942 2
1943 3
1944 3
1945 4
...
2017 1032
2018 1147
2019 1030
2020 953
2021 592
Length: 74, dtype: int64
iplot(px.area(release_year,
x=release_year.index,
y=release_year,
labels = dict(release_year='Release Year',y='No. of Shows')))
shows = data.sort_values("release_year",ascending=False)
last10years = shows.groupby(shows['release_year']).size()
shows
| show_id | type | title | director | cast | country | date_added | release_year | rating | duration | listed_in | description | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 693 | s694 | Movie | Ali & Ratu Ratu Queens | Lucky Kuswandi | Iqbaal Ramadhan, Nirina Zubir, Asri Welas, Tik... | NaN | June 17, 2021 | 2021 | TV-14 | 101 min | Comedies, Dramas, International Movies | After his father's passing, a teenager sets ou... |
| 781 | s782 | Movie | Black Holes | The Edge of All We Know | Peter Galison | NaN | NaN | June 2, 2021 | 2021 | TV-14 | 99 min | Documentaries | Follow scientists on their quest to understand... |
| 762 | s763 | Movie | Sweet & Sour | Lee Kae-byeok | Jang Ki-yong, Chae Soo-bin, Jung Soo-jung | South Korea | June 4, 2021 | 2021 | TV-14 | 103 min | Comedies, International Movies, Romantic Movies | Faced with real-world opportunities and challe... |
| 763 | s764 | TV Show | Sweet Tooth | NaN | Nonso Anozie, Christian Convery, Adeel Akhtar,... | United States | June 4, 2021 | 2021 | TV-14 | 1 Season | TV Action & Adventure, TV Dramas, TV Sci-Fi & ... | On a perilous adventure across a post-apocalyp... |
| 764 | s765 | Movie | Trippin' with the Kandasamys | Jayan Moodley | Jailoshini Naidoo, Maeshni Naicker, Madhushan ... | South Africa | June 4, 2021 | 2021 | TV-14 | 94 min | Comedies, International Movies, Romantic Movies | To rekindle their marriages, best friends-turn... |
| 765 | s766 | Movie | Xtreme | Daniel Benmayor | Teo García, Óscar Jaenada, Óscar Casas, Andrea... | Spain | June 4, 2021 | 2021 | TV-MA | 112 min | Action & Adventure, International Movies | In this fast-paced and action-packed thriller,... |
| 766 | s767 | Movie | Alan Saldaña: Locked Up | Alex Díaz | Alan Saldaña | Mexico | June 3, 2021 | 2021 | TV-MA | 49 min | Stand-Up Comedy | Mexican comedian Alan Saldaña is back, poking ... |
| 767 | s768 | TV Show | Creator's File: GOLD | NaN | Ryuji Akiyama, Ryusei Yokohama, Yumi Adachi, A... | Japan | June 3, 2021 | 2021 | TV-14 | 1 Season | International TV Shows, TV Comedies | Comedian Ryuji Akiyama satirizes top “creators... |
| 768 | s769 | Movie | Dancing Queens | Helena Bergström | Molly Nutley, Fredrik Quiñones, Marie Göranzon... | Sweden | June 3, 2021 | 2021 | TV-MA | 111 min | Comedies, Dramas, International Movies | A dancer who gets a job cleaning at a struggli... |
| 770 | s771 | Movie | Myriam Fares: The Journey | Myriam Fares | Myriam Fares | United Arab Emirates | June 3, 2021 | 2021 | TV-14 | 72 min | Documentaries, International Movies, Music & M... | From pregnancy to album preparations, Lebanese... |